library(data.table)
library(ggplot2)
library(knitr)
library(ggrepel)
library(RColorBrewer)
library(DESeq2)
library(rnaseqGene)
library(plotly)
library(Rtsne)
library(scales)

rm(list = ls())

setDTthreads(8)

data.output.dir <- file.path(here::here(
  '..','..',
  's3-roybal-tcsl',
  'lenti_screen_compiled_data','data'))
load(file=file.path(data.output.dir, 'pooled_analysis_data.Rdata'))

# set this to true to re-run, else will load from s3
rerun_deseq <- F

DeSeq2 Computation

DESeq2 Function

run_deseq <- function(data.dt, ref_bin, test_bin, 
    control_replicates = T,                      
    interaction = T, group.control = F, weight.bins = F) 
{
  
  # identify inputs
  assay_input <- as.character(unique(data.dt[, assay]))
  k_type <- as.character(unique(data.dt[, k.type]))
  t_type <- as.character(unique(data.dt[, t.type]))
  
  ## 1. Do bin normalization weights =======
  # bin normalization weights
  data.weights <- unique(data.dt[, list(
    batch, donor, timepoint, assay, t.type, k.type, 
    sort.group, bin, bin.pct, bin.reads)])[, 
      list(bin, bin.reads, bin.pct, sort.group,
        read.weight=bin.pct * bin.reads / sum(bin.pct * bin.reads)),
      by=.(batch, donor, timepoint, assay, t.type, k.type)][, 
        read.weight.norm := read.weight/exp(mean(log(read.weight))), 
        by=.(batch, donor, timepoint, assay, t.type, k.type)]
  
  stopifnot(nrow(interaction(assay_input, k_type, t_type)) == 1)
  
  # prepare cts and coldata dataframes
  
  ## 2. Prepare Ref Bins ============
  if(length(ref_bin) == 1 & ref_bin[1] == 'baseline') {
    # reference is baseline
    # get baseline counts per donor/assay replicate
    ref.bin.dt <- dcast(
      data.dt[
        bin == 'D' & assay == assay_input &
          k.type == k_type & t.type == t_type, 
        .(
          CAR.align, 
          bin.sort.group = paste(
            batch, donor, timepoint, assay, t.type, 'base', sep = '_'), 
          k.type, t.type, batch, assay, donor, 
          sort.group, 
          bin = 'base',
          counts = baseline.counts)], 
      CAR.align ~ bin.sort.group, value.var='counts')
    
    ref.weights <- rep(1, ncol(ref.bin.dt))
    
  } else {
    # reference is a specified bin
    ref.bin.dt <- dcast(
      data.dt[
        bin %in% ref_bin & assay == assay_input &
          k.type == k_type & t.type == t_type, 
        .(
          CAR.align, 
          bin.sort.group = paste(sort.group, bin, sep = '_'), 
          k.type, t.type, batch, assay, donor, 
          sort.group, 
          bin,
          counts)], 
      CAR.align ~ bin.sort.group, value.var='counts')
    
    ref.weights <- dcast(data.weights[
        bin %in% ref_bin & assay == assay_input &
            k.type == k_type & t.type == t_type, 
        .(
          bin.sort.group = paste(sort.group, bin, sep = '_'), 
          k.type, t.type, batch, assay, donor, 
          sort.group,
          bin,
          read.weight.norm)],
        . ~ bin.sort.group, 
        value.var = 'read.weight.norm')
    
    stopifnot(nrow(ref.bin.dt) == nrow(unique(ref.bin.dt)))
  }
  
  # copy the ref bin columns for each of the test bin columns
  if (interaction == T) {
    
    num.ref.reps <- ncol(ref.bin.dt) - 1
    
    ref.bin.dt <- cbind(ref.bin.dt[, 1], 
      do.call("cbind", replicate(length(test_bin), 
      ref.bin.dt[, -1], simplify = FALSE)))
    
    names(ref.bin.dt) <- c(names(ref.bin.dt[, 1]), 
      paste(names(ref.bin.dt[, -1]), rep(test_bin, each=num.ref.reps), 
        sep = '_'))
  }
  
  ## 3. Prepare Test Bins ============
  test.bin.dt <- dcast(
      data.dt[
        bin %in% test_bin & assay == assay_input &
          k.type == k_type & t.type == t_type, 
        .(
          CAR.align, 
          bin.sort.group = paste(sort.group, bin, sep = '_'), 
          k.type, t.type, batch, assay, donor, 
          sort.group, 
          bin,
          counts)], 
      CAR.align ~ bin.sort.group, value.var='counts')
  
  # check that replicate counts match
  stopifnot(nrow(ref.bin.dt) == nrow(unique(ref.bin.dt)))
  stopifnot(nrow(test.bin.dt) == nrow(unique(test.bin.dt)))
  
  ## 4. Merge and create design matrix ============
  cts <- merge(ref.bin.dt, test.bin.dt, by = 'CAR.align')
  cts <- data.frame(cts[, -1], row.names = cts[, CAR.align])
  cts[is.na(cts)] <- 0
  
  coldata <- data.frame(
    condition = c(
      rep('reference', ncol(ref.bin.dt) - 1), 
      rep('test', ncol(test.bin.dt) - 1)), 
    rep = data.table(t(sapply(strsplit(c(
      names(ref.bin.dt)[-1], 
      names(test.bin.dt)[-1]),"_"), `[`, c(1,2))))[,
        paste(V1, V2, sep='_')],
    bin = sapply(strsplit(c(
      names(ref.bin.dt)[-1], 
      names(test.bin.dt)[-1]),"_"), `[`, 7),
    row.names = c(names(ref.bin.dt)[-1], names(test.bin.dt)[-1]))
  
  dds <- DESeqDataSetFromMatrix(countData = cts,
                                colData = coldata,
                                design =  ~ condition + rep)
  
  # set reference
  dds$condition <- relevel(dds$condition, ref = "reference")
  
  print(coldata)
  
  # pre-filtering
  keep <- rowSums(counts(dds)) >= 10
  dds <- dds[keep,]
  
  # try various designs of decreasing complexity, since we don't have a
  # full matrix for every example deseq run.
  
  # try_designs <- function(designs, dds) tryCatch({
  #   message(paste("trying design:",as.character(designs[[1]]),"\n"))
  #   DESeq2::design(dds) <- designs[[1]]
  #   return(DESeq2::DESeq(dds)) }, error= function(e) {
  #     dds <- try_designs(designs[c(-1)], dds)
  #     message(e)
  #     message('getting here\n')
  #     return(dds)
  #   }
  # )
  #     
  # try_designs(
  #   designs=c(
  #     ~ condition + condition:rep + condition:bin,
  #     ~ condition + rep + bin,
  #     ~ condition + rep,
  #     ~ condition
  #   ),
  #   dds
  # )
  
  # if (control_replicates) {
  #     design(dds) <- ~ condition + rep
  # }
  # 
  # #check unique bins before using bins as contrast
  # n_uniq_bins <- length(unique(coldata$bin))
  # 
  # if (n_uniq_bins == 1 & interaction == T) {
  #   warning('Cannot use bin contrast, only one bin level.')
  # }
  # 
  # if (interaction == T & group.control == T & n_uniq_bins > 1) {
  #   design(dds) <- ~ condition + condition:rep + condition:bin
  # } else if (interaction == T & n_uniq_bins > 1) {
  #   design(dds) <- ~ condition + rep + bin
  # }

  dds <- DESeq2::DESeq(dds)
  res <- results(dds)
  
  # shrink log fold change
  resLFC <- lfcShrink(dds, 
    coef="condition_test_vs_reference", type="apeglm")
  
  # convert to data.table
  results.dt <- as.data.table(resLFC)[, CAR.align := row.names(resLFC)]
  results.dt <- cbind(results.dt[, 6], results.dt[, -6])
  results.dt[, assay := assay_input][, k.type := k_type][, t.type := t_type]
  
  return(results.dt)
}

Run DESeq2 on all comparisons

test_ref_sets <- c(ref=list(), test=list())

# A/B/AB vs C/D/CD
test_ref_sets$ref <- c(as.list(rep('D',3)), as.list(rep('C',3)), rep(list(c('C','D')),3))
test_ref_sets$test <- rep(list('A','B',c('A','B')),3)
test_ref_sets$interaction <- as.list(rep(F, 9))

# A/B/AB/ABCD vs baseline
test_ref_sets$ref <- c(test_ref_sets$ref, as.list(rep('baseline',3)))
test_ref_sets$test <- c(test_ref_sets$test, list('A','B',c('A','B','C','D')))
test_ref_sets$interaction <- c(test_ref_sets$interaction, as.list(rep(T, 3)))

all.deseq.results.dt <- data.table()

for (set_i in seq_along(test_ref_sets$ref)) {
    
  ref_set <- test_ref_sets$ref[[set_i]]
  test_set <- test_ref_sets$test[[set_i]]
  
  ref_str <- paste0(ref_set, collapse='')
  test_str <- paste0(test_set, collapse='')
  
  inter <- test_ref_sets$interaction[[set_i]]
  
  deseq.results.dt <- read.counts[batch != 'post-cytof' & !is.na(k.type),
    {
      message(paste(c(ref_str, test_str, inter, .BY[1],"\n"), collapse= ' - '));
      tryCatch(
        run_deseq(
          data.dt = .SD, 
          ref_bin = ref_set,
          test_bin = test_set,
          interaction = inter,
          group.control = T),
        error= function(e) {message(e); return(data.table())}
      )
    },
    by = .(group)]
  
  if (nrow(deseq.results.dt) > 0) {
    deseq.results.dt[,
      `:=`(
        ref_set = ref_str,
        test_set = test_str,
        inter = inter)]
  }
  
  all.deseq.results.dt <- rbind(
    all.deseq.results.dt,
    deseq.results.dt, fill=T)
}
save(list=c('all.deseq.results.dt'),
     file=file.path(data.output.dir, 'pooled_deseq2_data.Rdata'))

Plots

if (!rerun_deseq) load(
  file=file.path(data.output.dir, 'pooled_deseq2_data.Rdata'))

#add back CAR scores
cols_to_add <- c('CAR.score','sort.group','donor','batch')
cols_to_join <- c('group', 'CAR.align', 'assay', 'k.type', 't.type')


all.deseq.results.dt[, padj.disp := -log10(padj)]
all.deseq.results.dt[, lfc.disp := log2FoldChange]
all.deseq.results.dt[padj.disp > 10, padj.disp := Inf]
all.deseq.results.dt[abs(lfc.disp) > 5, lfc.disp := sign(lfc.disp) * Inf]

# mask receptor names except for known ones
control_domains <- c('4-1BB','CD28')
chosen_domains <- c('BAFF-R','CD40','TACI','TNR8')
neg_domain <- c('KLRG1')

all.deseq.results.dt[, CAR.type := 'other']
all.deseq.results.dt[CAR.align %in% control_domains, CAR.type := 'control']
all.deseq.results.dt[CAR.align %in% chosen_domains, CAR.type := 'chosen']
all.deseq.results.dt[CAR.align %in% neg_domain, CAR.type := 'neg']
all.deseq.results.dt[, 
  CAR.type := factor(CAR.type,levels=c('other','control','chosen','neg'))]

make_volcanoes <- function(data.dt) {
  ggplot(data.dt, aes(
    x=lfc.disp, y=padj.disp, 
    color=CAR.type,
    label=CAR.align,
    size=CAR.type)) + 
  geom_point() +
  facet_grid(test_set + ref_set ~ t.type + assay + k.type) +
  scale_color_manual('',
    labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
    values=c('grey50', RColorBrewer::brewer.pal(5, 'Paired')[c(2,4,5)])) +
  scale_size_manual('',
    labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
    values=c(1,3,3,3)) +
  labs(x='Log2 FC', y='-log10(P-value)', title='Assay Volcano Plots')
}

make_timeseries <- function(data.dt) {
  ggplot(data.dt, aes(
    y=lfc.disp, x=assay, 
    color=CAR.type,
    group=CAR.align,
    label=CAR.align,
    size=CAR.type)) + 
    geom_point() +
    geom_line() +
    facet_grid(t.type ~ test_set + ref_set) +
    scale_color_manual('',
      labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
      values=c('grey50', RColorBrewer::brewer.pal(5, 'Paired')[c(2,4,5)])) +
    scale_size_manual('',
      labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
      values=c(0.5,1,1,1)) +
    labs(y='Log2 FC', x='Assay', title='Log fold change across assays')

}

make_cd4_cd8 <- function(data.dt) {
  ggplot(
    dcast(data.dt, 
    CAR.align + assay + k.type + ref_set + test_set + inter + CAR.type ~ t.type, 
    value.var = c("log2FoldChange", "padj.disp")), 
  aes(y=log2FoldChange_CD8, x=log2FoldChange_CD4,
      color=CAR.type,
      label=CAR.align,
      size=CAR.type)) + 
  geom_point() +
  facet_grid(test_set + ref_set ~  assay + k.type) +
  scale_color_manual('',
    labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
    values=c('grey50', RColorBrewer::brewer.pal(5, 'Paired')[c(2,4,5)])) +
  scale_size_manual('',
    labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
    values=c(1,3,3,3)) +
  labs(x='CD4', y='CD8', title='Log fold change, CD4 vs CD8')
}

make_pos_neg <- function(data.dt) {
  ggplot(
    dcast(data.dt, 
    CAR.align + assay + t.type + ref_set + test_set + inter + CAR.type ~ k.type, 
    value.var = c("lfc.disp", "padj.disp")), 
  aes(y=lfc.disp_pos, x=lfc.disp_neg,
      color=CAR.type,
      label=CAR.align,
      size=CAR.type)) + 
  geom_point() +
  facet_grid(test_set + ref_set ~ t.type + assay) +
  scale_color_manual('',
    labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
    values=c('grey50', RColorBrewer::brewer.pal(5, 'Paired')[c(2,4,5)])) +
  scale_size_manual('',
    labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
    values=c(1,3,3,3)) +
  labs(x='CD19-', y='CD19+', title='Log fold change, CD19+ vs CD19-')
}

Replicates comparisons

Comparisons on x/y, all combos

x_group <- 'prolif2_d1_3d_CTV1_CD4_neg'
y_group <- 'prolif2_d1_3d_CTV1_CD4_pos'

cast_comparison <- function(
    comp.df, x_group, y_group, col='CAR.score', xycols=c('x','y'),
    rescale= 'combined') {
  
  message(paste(x_group, y_group, sep=', '))
  cast_groups <- dcast(
    unique(comp.df[sort.group %in% c(x_group, y_group), 
        list(CAR.align, sort.group, CAR.score)]), 
    CAR.align ~ sort.group, value.var = col)[, 
      comparison := paste(x_group, y_group, sep=' vs ')]
  names(cast_groups)[c(2,3)] <- xycols
  
  stopifnot(rescale %in% c('combined','separate'))
  
  # rescle == combined:
  # rescale both x and y to (0,1) on same scale
  xy_scalemin = cast_groups[, min(c(x,y), na.rm=T)]
  xy_scalemax = cast_groups[, max(c(x,y), na.rm=T)]
  tryCatch({
    cast_groups[, x_scaled_comb := rescale(x, from=c(min(c(x,y), na.rm=T), max(c(x,y), na.rm=T)))]
    cast_groups[, y_scaled_comb := rescale(y, from=c(min(c(x,y), na.rm=T), max(c(x,y), na.rm=T)))]
  }, error = function(e) {
    message(e)
    cast_groups[, x_scaled := NaN]
    cast_groups[, y_scaled := NaN]}
  )
    
  # rescle == separate:
  # rescale x and y to (0,1) on individual scales
  tryCatch({
    cast_groups[, x_scaled_sep := rescale(x)]
    cast_groups[, y_scaled_sep := rescale(y)]
  }, error = function(e) {
    message(e)
    cast_groups[, x_scaled := NaN]
    cast_groups[, y_scaled := NaN]}
  )
}

# remove prolif1.d2.ctv3 for now - CD4 has only 1 bin, D, 
# and KLRG1 is on top for CD19+ ...need to check this more

pos_neg_comparisons <- read.counts[grepl('CTV', assay), {
  if (length(unique(sort.group)) == 2)
    cast_comparison(
      .SD,
      unique(sort.group[k.type == 'neg']),
      unique(sort.group[k.type == 'pos']))
  },
  by=c('donor','assay','batch','t.type')][
    !(donor == 'd2' & batch == 'prolif1' & assay == 'CTV3')]
## prolif2_d1_3d_CTV1_CD4_neg, prolif2_d1_3d_CTV1_CD4_pos
## prolif2_d1_16d_CTV2_CD4_neg, prolif2_d1_16d_CTV2_CD4_pos
## prolif2_d1_24d_CTV3_CD4_neg, prolif2_d1_24d_CTV3_CD4_pos
## prolif2_d1_16d_CTV2_CD8_neg, prolif2_d1_16d_CTV2_CD8_pos
## prolif2_d1_24d_CTV3_CD8_neg, prolif2_d1_24d_CTV3_CD8_pos
## prolif1_d2_4d_CTV1_CD4_neg, prolif1_d2_4d_CTV1_CD4_pos
## prolif1_d2_14d_CTV2_CD4_neg, prolif1_d2_14d_CTV2_CD4_pos
## prolif2_d2_3d_CTV1_CD4_neg, prolif2_d2_3d_CTV1_CD4_pos
## prolif1_d2_24d_CTV3_CD4_neg, prolif1_d2_24d_CTV3_CD4_pos
## prolif1_d2_4d_CTV1_CD8_neg, prolif1_d2_4d_CTV1_CD8_pos
## prolif1_d2_14d_CTV2_CD8_neg, prolif1_d2_14d_CTV2_CD8_pos
## prolif2_d2_4d_CTV1_CD8_neg, prolif2_d2_4d_CTV1_CD8_pos
## prolif1_d2_24d_CTV3_CD8_neg, prolif1_d2_24d_CTV3_CD8_pos
## prolif2_d2_16d_CTV2_CD8_neg, prolif2_d2_16d_CTV2_CD8_pos
## prolif2_d2_24d_CTV3_CD8_neg, prolif2_d2_24d_CTV3_CD8_pos
ggplot(pos_neg_comparisons[, prolif_donor := interaction(batch, donor)], 
       aes(x=x_scaled_comb, y=y_scaled_comb, label=CAR.align)) + 
    geom_point() + 
    facet_grid(prolif_donor+t.type~assay) + 
    geom_text_repel(data=pos_neg_comparisons[, 
      prolif_donor := interaction(batch, donor)][
         CAR.align %in% c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40')]) +
    labs(x='CD19- Proliferation (+/- scaled together, per replicate)', 
         y='CD19+ Proliferation (+/- scaled together, per replicate)')

ggplot(pos_neg_comparisons[, prolif_donor := interaction(batch, donor)], 
       aes(x=x_scaled_comb/y_scaled_comb, y=y_scaled_sep, label=CAR.align)) + 
    geom_point() + 
    facet_grid(prolif_donor+t.type~assay) + 
    geom_text_repel(data=pos_neg_comparisons[, 
      prolif_donor := interaction(batch, donor)][
         CAR.align %in% c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40')]) +
    labs(x='CD19+ Proliferation (+ scaled alone, per replicate)', 
         y='CD19- : CD19+ Proliferation Ratio')

pos_neg_comparisons_melt <- melt(
  pos_neg_comparisons[, 
    xy_ratio := x_scaled_comb/y_scaled_comb], measure.vars= grep(
  '^[xy]', names(pos_neg_comparisons), perl=T, value=T)
  )[, 
    `:=`(var_mean= mean(value, na.rm=T), var_sd= sd(value, na.rm=T)), 
    by=c('t.type','assay','CAR.align','variable')]

cast_left <- paste(names(pos_neg_comparisons_melt)[1:6], collapse = ' + ')
cast_right <- 'variable'
cast_formula <- paste(cast_left, cast_right, sep= ' ~ ')

pos_neg_assay_avg <- dcast(
  pos_neg_comparisons_melt, cast_formula, value.var='var_mean')

pos_neg_assay_plot <- unique(
  pos_neg_assay_avg[, list(t.type, assay, xy_ratio, y_scaled_sep, CAR.align)])
pos_neg_assay_plot[, label.few := '']
pos_neg_assay_plot[
  CAR.align %in% c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40'),
  label.few := CAR.align]


ggplot(pos_neg_assay_plot, 
    aes(x=xy_ratio, y=y_scaled_sep, label=label.few)) + 
  geom_point() + 
  facet_wrap(t.type ~ assay) + 
  geom_text_repel() +
  labs(
    title='Average Prolifertation and -/+ ratio across replicates',
    y='CD19+ Proliferation (scaled for replicate)', 
    x='CD19- : CD19+ Proliferation Ratio')

ggplotly(ggplot(pos_neg_assay_plot, 
     aes(x=xy_ratio, y=y_scaled_sep, label=CAR.align)) + 
  geom_point() + 
  facet_wrap(t.type ~ assay) + 
  labs(
      title='Average Prolifertation and -/+ ratio across replicates (interactive)',
      y='CD19+ Proliferation (scaled for replicate)', 
      x='CD19- : CD19+ Proliferation Ratio'))

6 panel - all CTVs, CD4s and CD8s averaged separately

ggplot_pos_neg_assay <- ggplot(pos_neg_assay_plot, 
       aes(x=xy_ratio, y=y_scaled_sep, label=label.few)) + 
    geom_point() + 
    facet_grid(t.type ~ assay) + 
    scale_x_continuous(labels=percent) +
    geom_text_repel(size=2.5) +
    geom_vline(xintercept = -Inf) + geom_hline(yintercept = -Inf) +
    theme_minimal() +
    labs(
        title='Average Prolifertation and -/+ ratio across replicates',
        y='CD19+ Proliferation (Scaled)', 
        x='Relative Nonspecific Proliferation')

ggplot_pos_neg_assay

ggsave(here::here('..','figs','pooled','relative_prolif.pdf'), 
       ggplot_pos_neg_assay, height=4, width=7, useDingbats=FALSE)

2 panel - all CD4s and CD8s averaged separately

pos_neg_comparisons_melt <- melt(
  pos_neg_comparisons[, 
    xy_ratio := x_scaled_comb/y_scaled_comb], measure.vars= grep(
  '^[xy]', names(pos_neg_comparisons), perl=T, value=T)
  )[, 
    `:=`(var_mean= mean(value, na.rm=T), var_sd= sd(value, na.rm=T)), 
    by=c('t.type','CAR.align','variable')]

cast_left <- paste(names(pos_neg_comparisons_melt)[1:6], collapse = ' + ')
cast_right <- 'variable'
cast_formula <- paste(cast_left, cast_right, sep= ' ~ ')

pos_neg_assay_avg <- dcast(
  pos_neg_comparisons_melt, cast_formula, value.var='var_mean')

pos_neg_assay_plot <- unique(
  pos_neg_assay_avg[, list(t.type, xy_ratio, y_scaled_sep, CAR.align)])
pos_neg_assay_plot[, label.few := '']
pos_neg_assay_plot[
  CAR.align %in% c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40'),
  label.few := CAR.align]

ggplot_pos_neg_assay <- ggplot(pos_neg_assay_plot, 
       aes(x=xy_ratio, y=y_scaled_sep, label=label.few)) + 
    geom_point() + 
    facet_grid(t.type ~ .) + 
    scale_x_continuous(labels=percent) +
    geom_text_repel(size=2.5) +
    geom_vline(xintercept = -Inf) + geom_hline(yintercept = -Inf) +
    theme_minimal() +
    labs(
        title='Average Prolifertation and -/+ ratio across replicates',
        y='CD19+ Proliferation (Scaled)', 
        x='Relative Nonspecific Proliferation')

##TODO Change labelled CARs to ones significant in the CD19+ panel

ggsave(here::here('..','figs','pooled','relative_prolif_t_mean.pdf'), 
       ggplot_pos_neg_assay, height=4, width=7, useDingbats=FALSE)

2 panel - +/- no ratio, all CD4s and CD8s averaged separately (BEST)

pos_neg_comparisons_melt <- melt(
  pos_neg_comparisons[, 
    xy_ratio := x_scaled_comb/y_scaled_comb], measure.vars= grep(
  '^[xy]', names(pos_neg_comparisons), perl=T, value=T)
  )[, 
    `:=`(var_mean= mean(value, na.rm=T), var_sd= sd(value, na.rm=T)), 
    by=c('t.type','CAR.align','variable')]

cast_left <- paste(names(pos_neg_comparisons_melt)[1:6], collapse = ' + ')
cast_right <- 'variable'
cast_formula <- paste(cast_left, cast_right, sep= ' ~ ')

pos_neg_assay_avg <- dcast(
  pos_neg_comparisons_melt, cast_formula, value.var='var_mean')

pos_neg_assay_plot <- unique(
  pos_neg_assay_avg[, list(t.type, x_scaled_comb, y_scaled_comb, CAR.align)])
pos_neg_assay_plot[, label.few := '']
pos_neg_assay_plot[
  CAR.align %in% c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40'),
  label.few := CAR.align]

# simpler plot
ggplot_pos_neg_assay <- ggplot(pos_neg_assay_plot, 
       aes(x=x_scaled_comb, y=y_scaled_comb, label=label.few)) + 
    geom_point() + 
    facet_grid(t.type ~ .) + 
    scale_x_continuous(expand=expansion(mult=1, add=0), limits=c(0, NA)) +
    expand_limits(x=0) +
    geom_text_repel(size=2.5) +
    geom_vline(xintercept = -Inf) + geom_hline(yintercept = -Inf) +
    theme_minimal() +
    labs(
        title='Proliferation, +/- antigen',
        y='CD19+ Proliferation (scaled per replicate)', 
        x='CD19- Proliferation (scaled per replicate)')

# with axis cuts
ggplot_pos_neg_assay <- ggplot(pos_neg_assay_plot, 
       aes(x=x_scaled_comb, y=y_scaled_comb, label=label.few)) + 
    geom_point() + 
    facet_grid(t.type ~ .) + 
    scale_x_continuous(expand=expansion(mult=c(0,.1), add=0), limits=c(0,NA)) +
    scale_y_continuous(limits=c(0.48, 0.95), expand=expansion(mult=c(0,0), add=0), 
      breaks=(5:10)/10, labels=c('', (6:10)/10)) +
    geom_text_repel(size=2.5) +
    geom_vline(xintercept = -Inf) + geom_hline(yintercept = -Inf) +
    labs(
        title='Proliferation, +/- antigen',
        y='CD19+ Proliferation (scaled per replicate)', 
        x='CD19- Proliferation\n(scaled per replicate)') +
    theme_minimal() +
    theme(panel.spacing.y=unit(1.5, "lines"))

ggsave(here::here('..','figs','pooled','scaled_prolif_t_mean.pdf'), 
       ggplot_pos_neg_assay, height=6, width=3.6, useDingbats=FALSE)

Dan likes this one the best and will use for Fig 2B (until someone complains again).

2 panel - CTV1 only, +/- no ratio, all CD4s and CD8s averaged separately

pos_neg_comparisons_melt <- melt(
  pos_neg_comparisons[, 
    xy_ratio := x_scaled_comb/y_scaled_comb], measure.vars= grep(
  '^[xy]', names(pos_neg_comparisons), perl=T, value=T)
  )[, 
    `:=`(var_mean= mean(value, na.rm=T), var_sd= sd(value, na.rm=T)), 
    by=c('t.type','CAR.align','variable', 'assay')]

cast_left <- paste(names(pos_neg_comparisons_melt)[1:6], collapse = ' + ')
cast_right <- 'variable'
cast_formula <- paste(cast_left, cast_right, sep= ' ~ ')

pos_neg_assay_avg <- dcast(
  pos_neg_comparisons_melt, cast_formula, value.var='var_mean')

pos_neg_assay_plot <- unique(
  pos_neg_assay_avg[, list(t.type, x_scaled_comb, y_scaled_comb, CAR.align, assay)])
pos_neg_assay_plot[, label.few := '']
pos_neg_assay_plot[
  CAR.align %in% c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40'),
  label.few := CAR.align]

ggplot_pos_neg_assay <- ggplot(pos_neg_assay_plot[assay == 'CTV1'], 
       aes(x=x_scaled_comb, y=y_scaled_comb, label=label.few)) + 
    geom_point() + 
    facet_grid(t.type ~ .) + 
    scale_x_continuous() +
    geom_text_repel(size=2.5) +
    geom_vline(xintercept = -Inf) + geom_hline(yintercept = -Inf) +
    theme_minimal() +
    labs(
        title='Average Prolifertation and -/+ ratio across replicates',
        y='CTV1 CD19+ Proliferation (Scaled)', 
        x='CTV1 CD19- Proliferation (Scaled)')

ggsave(here::here('..','figs','pooled','scaled_prolif_t_mean_ctv1.pdf'), 
       ggplot_pos_neg_assay, height=4, width=7, useDingbats=FALSE)

Figure 2 Deseq2 Panels

Bin vs baseline measurements

ggplotly(make_volcanoes(all.deseq.results.dt[
  k.type == 'pos' & ref_set == 'baseline']), 
  tooltip = "label", session='knitr')
ggplotly(make_cd4_cd8(all.deseq.results.dt[
  k.type == 'pos' & ref_set == 'baseline']), 
  tooltip = "label", session='knitr')
ggplotly(make_timeseries(all.deseq.results.dt[
  k.type == 'pos' & ref_set == 'baseline']), 
  tooltip = "label", session='knitr')
ggplotly(make_pos_neg(all.deseq.results.dt[
  ref_set == 'baseline']), 
  tooltip = "label", session='knitr')

Interbin measurements

ggplotly(make_volcanoes(all.deseq.results.dt[
  k.type == 'pos' & ref_set != 'baseline']), 
  tooltip = "label", session='knitr')
ggplotly(make_cd4_cd8(all.deseq.results.dt[
  k.type == 'pos' & ref_set != 'baseline']), 
  tooltip = "label", session='knitr')
ggplotly(make_timeseries(all.deseq.results.dt[
  k.type == 'pos' & ref_set != 'baseline']), 
  tooltip = "label", session='knitr')

cd19+ vs cd19-

ggplotly(make_pos_neg(all.deseq.results.dt[
  ref_set == 'baseline']), 
  tooltip = "label", session='knitr')
ggplotly(make_pos_neg(all.deseq.results.dt[
  ref_set != 'baseline']), 
  tooltip = "label", session='knitr')

Figure 2 Panels

data.output.dir <- file.path(here::here(
  '..','..',
  's3-roybal-tcsl',
  'lenti_screen_compiled_data','data'))

load(
  file=file.path(data.output.dir, 'pooled_deseq2_data.Rdata'))


all.deseq.results.dt[, padj.disp := -log10(padj)]
all.deseq.results.dt[, lfc.disp := log2FoldChange]
all.deseq.results.dt[padj.disp > 10, padj.disp := Inf]
all.deseq.results.dt[abs(lfc.disp) > 5, lfc.disp := sign(lfc.disp) * Inf]

data.dt <- all.deseq.results.dt

data.dt[, CAR.type := 'other']
data.dt[CAR.align == '4-1BB', CAR.type := '4-1BB']
data.dt[CAR.align == 'CD28', CAR.type := 'CD28']
data.dt[, CAR.type := factor(
  CAR.type,levels=c('other','4-1BB','CD28'))]

Interbin Volcano

#CD28 and 4-1BB Colors
receptor_cols <- RColorBrewer::brewer.pal(9, 'YlGnBu')[c(6,8)]

# label significant hits
data.dt[, CAR.type.size := CAR.type]
data.dt[CAR.type == 'other' & 
    ((padj < 0.05 & log2FoldChange > 0) | 
    (padj < 0.05 & log2FoldChange > 0)), 
  CAR.type.size := 'other_sig']

data.dt[, CAR.label.sig := '']
data.dt[CAR.type.size != 'other',
    CAR.label.sig := CAR.align]

# plot subset - CTV1, CTV2, and cumulative of all 3
data.subset.dt <- data.dt[
  k.type == 'pos' & (
      (ref_set == 'baseline' & test_set == 'ABCD' & assay == 'CTV3') | 
      (ref_set == 'D' & test_set == 'A' & assay != 'CTV3'))]

# rename facets
data.subset.dt[, assay := factor(assay, 
    labels=c('Initial Proliferation\n(d0-d3/d4)', 
             'Proliferation after 1 week\n(d8-d16)', 
             'Cumulative Proliferation\n(d0-d24)'))]

interbin_volcano <- ggplot(data.subset.dt, 
  aes(
    x=log2FoldChange, y=-log10(padj), 
    color=CAR.type,
    label=CAR.label.sig,
    size=CAR.type.size)) + 
  geom_point() +
  facet_grid(t.type ~ assay) +
  scale_color_manual('',
    labels=c('New Receptors', '4-1BB', 'CD28'),
    values=c('grey50', receptor_cols),
    guide=F) +
  scale_size_manual('',
    labels=c('New Receptors', '4-1BB', 'CD28', 'New Receptors'),
    values=c(0.8,3,3,2.5), guide=F) +
  geom_hline(yintercept=-log10(0.05), linetype = 'dashed') +
  labs(x='Relative Proliferation,\nLog2 Fold Change from Mean', 
       y='-log10(P-value)') + 
  scale_y_continuous(limits=c(0, 22)) +
  geom_text_repel() +
  theme_minimal() +
  theme(panel.border=element_rect(fill=NA))
  
ggsave(here::here('..','figs','pooled','interbin_volcano.pdf'), 
       interbin_volcano, height=4, width=7, useDingbats=FALSE)
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_text_repel).

CD4 vs CD8s

cast_4v8 <- dcast(data.dt[
      k.type == 'pos' & 
        ((ref_set == 'baseline' & test_set == 'ABCD' & assay != 'CTV1') | 
        (ref_set == 'D' & test_set == 'A' & assay == 'CTV1'))], 
      CAR.align + assay + CAR.type ~ t.type, 
    value.var = c("log2FoldChange", "padj.disp"))
    
# rename facets
cast_4v8[, assay := factor(assay, 
    labels=c('Initial Proliferation\n(d0-d3/d4)', 
             'Cumulative Proliferation\n(d0-d16)', 
             'Cumulative Proliferation,\n(d0-d24)'))]

# label significant hits
cast_4v8[, CAR.type.size := CAR.type]
cast_4v8[CAR.type == 'other' & 
    ((-padj.disp_CD4 < 0.05 & log2FoldChange_CD4 > 0) | 
    (-padj.disp_CD8 < 0.05 & log2FoldChange_CD8 > 0)), 
  CAR.type.size := 'other_sig']

prolif_4v8 <- ggplot(cast_4v8, 
  aes(y=log2FoldChange_CD8, x=log2FoldChange_CD4,
      color=CAR.type,
      label=CAR.align,
      size=CAR.type.size)) + 
  geom_point() +
  facet_grid(. ~  assay) +
  scale_color_manual('',
    labels=c('New Receptors', '4-1BB', 'CD28'),
    values=c('grey50', receptor_cols)) +
  scale_size_manual('',
    labels=c('New Receptors', '4-1BB', 'CD28', 'New Receptors'),
    values=c(0.8,3,3,2)) +
  geom_abline(linetype='dashed') +
  theme_minimal() + 
  labs(x='CD4, relative log2 fold change', 
       y='CD8, relative log2 fold change') +
  theme(panel.border=element_rect(fill=NA)) +
  guides(size=F)
  
ggsave(here::here('..','figs','pooled','mixed_4v8.pdf'), prolif_4v8,
       height=2.5, width=7)

CD19+ vs CD19-

cast_posneg <- dcast(data.dt[
      ((ref_set == 'baseline' & test_set == 'ABCD' & assay != 'CTV1') | 
      (ref_set == 'D' & test_set == 'A' & assay == 'CTV1'))], 
    CAR.align + assay + CAR.type + t.type ~ k.type, 
  value.var = c("log2FoldChange", "padj.disp"))
    
# rename facets
cast_posneg[, assay := factor(assay, 
    labels=c('Initial Proliferation\n(d0-d3/d4)', 
             'Cumulative Proliferation\n(d0-d16)', 
             'Cumulative Proliferation,\n(d0-d24)'))]

# label significant hits
cast_posneg[, CAR.type.size := CAR.type]
cast_posneg[CAR.type == 'other' & (
    (2^(-padj.disp_pos) < 0.05 & log2FoldChange_pos > 0) | 
    (2^(-padj.disp_neg) < 0.05 & log2FoldChange_neg > 0)
  ), 
  CAR.type.size := 'other_sig']

prolif_posneg <- ggplot(cast_posneg, 
  aes(y=log2FoldChange_pos, x=log2FoldChange_neg,
      color=CAR.type,
      label=CAR.align,
      size=CAR.type.size)) + 
  geom_point() +
  facet_grid(t.type ~  assay) +
  scale_color_manual('',
    labels=c('New Receptors', '4-1BB', 'CD28'),
    values=c('grey50', receptor_cols)) +
  scale_size_manual('',
    labels=c('New Receptors', '4-1BB', 'CD28', 'New Receptors'),
    values=c(0.8,3,3,2)) +
  geom_abline(linetype='dashed') +
  theme_minimal() + 
  labs(y='Relative log2 fold change, with antigen', 
       x='Relative log2 fold change, no antigen') +
  theme(panel.border=element_rect(fill=NA)) +
  guides(size=F)
  
ggsave(here::here('..','figs','pooled','mixed_posneg.pdf'), prolif_posneg,
       height=4.5, width=7)